#' ---
#' title: "Analyze final 2021: Learning from Polls"
#' author: "Lukas F. Stoetzer"  changes: "Richard Traunmueller"
#' date: "October 2021"
#' ---

# Libraries
  library(tidyverse)
  library(xtable)
  require(MASS)
  
# Seed
  set.seed(365235923)
  
# Source Code to estimate models
  source("FUN_ElicitedBeliefsMLE.R")
  source("FUN_Estimation.R")
  
# Prepare Data =========
  
  # Load 
  dat <- read.csv("data_exp_normal.csv",row.names = 1)
  
  
  # Descriptive Stats
  dat$university <- ifelse(dat$educ=="University degree", 1, 0)
  dat$polint <- ifelse(dat$polint=="Very interested", 1, 0)
  # Summary Statistics
  tab_sum <- dat %>% 
    dplyr::select(female, age, university, polint) %>% # select variables to summarise
    summarise(across(everything(), .f = list(mean = mean,
                                             min = min,
                                             median = median,
                                             max = max, 
                                             sd = sd), na.rm = T)) %>% 
    gather(stat, val) %>%
    separate(stat, into = c("var", "stat"), sep = "_") %>%
    spread(stat, val) %>%
    dplyr::select(var, min, median, max, mean, sd) # reorder columns
  
  xtable(tab_sum)
  
  # Balance
  balance.test.2 <- dat %>% 
    group_by(reverse,symmetric) %>% 
    summarize(N=n(), 
              female=mean(female, na.rm=T), 
              age=mean(age, na.rm=T), 
              educ=mean(university, na.rm=T), 
              polint=mean(polint, na.rm=T))
  
  xtable(balance.test.2)
  
  # Prepare Long Format for different scenarios
  dat_long <- dat %>%
    mutate(id = row_number()) %>%
    rename(beleif.0.ex = prior.ex,
           beleif.0.lo = prior.lo,
           beleif.0.up = prior.up,
           beleif.0.pl = prior.pl,
           beleif.0.pm = prior.pm) %>%
    gather(meas,val,-id,-symmetric,-reverse,-female,-age,-educ,-polint,-university) %>%
    tidyr::separate(meas,c("var","time","type")) %>%
    spread(type,val) %>%
    dplyr::select(id,time,symmetric,reverse,ex,lo,up,pl,pm,female,age,educ,polint,university) %>%
    mutate(check = lo<=ex & ex<=up) %>%
    arrange(id,time,symmetric,reverse)

  # Prepare Long Format for different scenarios
  dat_long <- dat_long %>%
    group_by(id) %>%
    summarise("check_all"=all(check)) %>%
    right_join(.,dat_long) %>% 
    filter(check_all)

  # Add Poll 
  dat_long <- mutate(dat_long,
      poll = case_when(
        time == 1 & reverse == 1 ~ 58,
        time == 2 & reverse == 1 ~ 54,
        time == 3 & reverse == 1 ~ 51,
        time == 1 & reverse == 0 ~ 51,
        time == 2 & reverse == 0 ~ 54,
        time == 3 & reverse == 0 ~ 58,
        TRUE ~ NaN,
      )
    )
  
    
# Estimate Model non-dynamic ============
  
  # Estimate Model for each Scenario and Point in Time
  res_est <- dat_long %>%
    group_by(symmetric,reverse, time) %>%
    do(est_mod_time(.))
  
  # Calculate Log-likelihood
  log_lik_m1 <- res_est %>% 
    group_by(symmetric,reverse) %>%
    summarise("log_lik" = sum(log_lik)) %>%
    mutate("model" = "separate_each_time",
           "par" = 4*3)
  
  # Calculate Log-likelihood
  res_est <- res_est %>% dplyr::select(-log_lik) 
  
  # Plot Results independent model (Intervals) 
  
  # Get Intervals
  df_est <- res_est %>%
    mutate(lower_in = qbeta(0.05,alpha,beta),
           upper_in = qbeta(0.95,alpha,beta),
           lower_in2 = qbeta(0.01,alpha,beta),
           upper_in2 = qbeta(0.99,alpha,beta),
           mean  =alpha/(alpha+beta))
  
  # Prepare Data for Plot
  df_est$symmetric <-  factor(df_est$symmetric, levels = c(0,1), labels=c("Clear Race Prior","Competitive Race Prior"))
  df_est$reverse <-  factor(df_est$reverse, levels = c(0,1), labels=c("Increasing Polls", "Decreasing Polls"))
  df_est$time <-  factor(df_est$time, levels = c(0,1,2,3), labels=c("Prior", "1. Poll","2. Poll", "3. Poll"))
  
  # Plot Data
  ggplot() +
    geom_linerange(data=df_est,aes(x=time,ymin=lower_in,ymax=upper_in,group=time), size=3,alpha=0.7) +
    geom_linerange(data=df_est,aes(x=time,ymin=lower_in2,ymax=upper_in2,group=time), size=3,alpha=0.4) +
    geom_point(data=df_est,aes(x=time,y=mean), size=3.5) +
    facet_grid(reverse ~ symmetric) +
    scale_color_grey() +
    theme_light() +  xlab("") +
    theme(axis.title.y=element_blank(),
          text = element_text(size=20),
          strip.text.y = element_text(angle = 360),
          legend.position = "bottom",
          # panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()
    )
  
  # Save Plot Results
  ggsave("fig_beliefsint.pdf",width=12,height = 6)

  # Plot Probability: Calculate mean
  
  # Polls 
  df_polls <- data.frame("reverse"=c(0,0,0,1,1,1),
                          "time"=as.character(c(1,2,3,1,2,3)),
                          "poll"=c(51,54,58,58,54,51)/100)
  
  # Recode to numeric values
  df_polls$reverse <- as.numeric(df_polls$reverse)-1
  df_polls$time <- as.character(as.numeric(df_polls$time))
  df_polls$poll <-  df_polls$poll.y
  

  # Get Intervals
  df_estprob <- res_est %>%
    mutate(prob.cand.win = 1-pbeta(0.5,alpha,beta))
  
  # Prepare Data for Plot
  df_estprob$symmetric <-  factor(df_estprob$symmetric, levels = c(0,1), labels=c("Non-symmetric Priors", "Symmetric Priors"))
  df_estprob$reverse <-  factor(df_estprob$reverse, levels = c(0,1), labels=c("Increasing Polls", "Decreasing Polls"))
  df_estprob$timeval <-  factor(df_estprob$time, levels = c(0,1,2,3), labels=c("Prior", "1. Poll","2. Poll", "3. Poll"))
  
  # Plot Data
  ggplot() +
    geom_point(data=df_estprob,aes(x=timeval,y=(prob.cand.win), col=symmetric), size=3.5) +
    geom_path(data=df_estprob,aes(x=timeval,y=(prob.cand.win), group=symmetric, col=symmetric), size=1) +
    facet_grid(~reverse) +
    scale_color_grey() +
    theme_light() + 
    ylab("Prob. Party A wins the Race") + xlab("") +
    theme(text = element_text(size=20),
          strip.text.y = element_text(angle = 360),
          legend.position = "bottom",
          legend.title=element_blank(),
          # panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()
    )
  
  # Save Plot Results
  ggsave("fig_beliefs_prob.pdf",width=12,height = 6)
    
# Estimate Model Dynamic Learning Model ============  
  
  dat_long <- dat_long %>%
    rename(pollY = poll) 
  
  # Estimate Model for different scenarios
  res_dyn_mod_all <-  dat_long %>%
    group_by(symmetric,reverse) %>%
    do(est_mod(.))
  
  # Calculate Log-likelihood
  log_lik_m2 <- res_dyn_mod_all %>% 
    group_by(symmetric,reverse) %>%
    summarise("log_lik" = sum(log_lik)) %>%
    mutate("model" = "fixpriors_dynamic",
           "par" = 3 + 3)
  
  # Exclude Log-liklihood
  res_dyn_mod_all <- res_dyn_mod_all %>% dplyr::select(-log_lik) 
  
  # Calcualte Rate of Adaption
  res_dyn_mod_all[,3] / (res_dyn_mod_all[,3] +  res_dyn_mod_all[,4])
  dat_long$cond <- paste0(dat_long$symmetric,dat_long$reverse, sep="+") 
  
  # Calculate learn Type (Sample from Likelihood)
  df_plot_learn <- do.call("rbind",
          lapply(split(dat_long, dat_long$cond), function(d) {
    
    est <- est_mod(d, ret = "all")
    S <- mvrnorm(3000,est[[1]]$par_raw,est[[1]]$vcov)
    S <- apply(S, 1, FUN = function(p) c(1/(1 + exp(-p[1])),exp(p[2]),exp(p[3])))
    
    # Create Data.frame
    df_plot_learn <- as.data.frame(t(S[1:2,]))
    names(df_plot_learn) <- c("delta","rho")
    df_plot_learn$d <- df_plot_learn$delta / (df_plot_learn$delta + df_plot_learn$rho)
    df_plot_learn$symmetric <- ifelse(unique(d$symmetric)==1,"Competitive Race","Clear Race")
    df_plot_learn$reverse <- ifelse(unique(d$reverse)==1,"Decreasing Polls","Increasing Polls")
    
    return(df_plot_learn)
    
  }))

  
  # Plot Learn type
  df_plot_learn$cond  <- paste(df_plot_learn$symmetric,
                                df_plot_learn$reverse,sep = " + ")
  
  # Plot estimates
  df_plot_all <- df_plot_learn %>% 
    group_by(cond) %>%
    summarise(d_mid = mean(d),
              d_low = quantile(d,0.025),
              d_high= quantile(d, 0.975),
              delta_mid = mean(delta),
              delta_low = quantile(delta,0.025),
              delta_high = quantile(delta, 0.975),
              rho_mid = mean(rho),
              rho_low = quantile(rho,0.025),
              rho_high = quantile(rho, 0.975)
              ) %>%
    gather(var,val,-cond) %>%
    separate(var, c("est","lvl")) %>%
    spread(lvl,val) %>%
    mutate(est = factor(est, levels = c("d","delta","rho"),
                        labels = c("Standardized Rate of Adaptation",
                                   "Prior Stickiness Rate",
                                   "Sample Scale Factor")))
  
  ggplot(df_plot_all) +
    geom_pointrange(aes(x=cond, y=mid, ymin=low, ymax=high)) +
    coord_flip() +
    facet_wrap(~ est, ncol=1) +
    theme_bw() + ylab("") + xlab("") +
    ylim(c(0,1)) +
    theme(text = element_text(size=20), 
          strip.text.y = element_text(angle = 360),
          legend.position = "none",
          legend.title = element_blank(),
          # panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())
  
  ggsave("fig_estimates_all.pdf",width=9,height = 5)
  
  
  # Plot Beliefs
  polls <- data.frame(reverse=c(0,1),
                      "poll.1"=c(.51,.58),
                      "poll.2"=c(.54,.54),
                      "poll.3"=c(.58,.51)
  )
  
  # Calcualte beliefs
  df_est_dyn <- left_join(res_dyn_mod_all, polls)  %>% 
    group_by(reverse, symmetric) %>%
    do(calc_beliefs(.$delta,.$rho,
                    .$alpha0,.$beta0, 
                    polls = c(.$poll.1,.$poll.2,.$poll.3))) %>%
    mutate(lower_in = qbeta(0.05,alpha,beta),
           upper_in = qbeta(0.95,alpha,beta),
           lower_in2 = qbeta(0.01,alpha,beta),
           upper_in2 = qbeta(0.99,alpha,beta),
           mean  =alpha/(alpha+beta))
  
  # Prepare Data for Plot
  df_est_dyn$symmetric <-  factor(df_est_dyn$symmetric, levels = c(0,1), labels=c("Clear Race Prior","Competitive Race Prior"))
  df_est_dyn$reverse <-  factor(df_est_dyn$reverse, levels = c(0,1), labels=c("Increasing Polls", "Decreasing Polls"))
  df_est_dyn$time <-  factor(df_est_dyn$t, levels = c(0,1,2,3), labels=c("Prior", "1. Poll","2. Poll", "3. Poll"))
  
  # Plot Data
  ggplot() +
    geom_linerange(data=df_est_dyn,aes(x=time,ymin=lower_in,ymax=upper_in,group=time), size=3,alpha=0.7) +
    geom_linerange(data=df_est_dyn,aes(x=time,ymin=lower_in2,ymax=upper_in2,group=time), size=3,alpha=0.4) +
    geom_point(data=df_est_dyn,aes(x=time,y=mean), size=3.5) +
    facet_grid(reverse ~ symmetric) +
    scale_color_grey() +
    theme_light() +  xlab("") +
    theme(axis.title.y=element_blank(),
          text = element_text(size=20),
          strip.text.y = element_text(angle = 360),
          legend.position = "bottom",
          # panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()
    )
  
  ggsave("fig_beliefsint_dyn.pdf",width=12,height = 6)
  
  
  
# Model Fit Comparison ============
  
  
  df_est_cbn <- bind_rows(mutate(df_est, "model"="Time point separately"),
                          mutate(df_est_dyn, "model"="Dynmic Learning model"))
  
 
  ggplot() +
    geom_linerange(data=df_est_cbn,aes(x=time,ymin=lower_in,col=model,
                                       ymax=upper_in,group=model), 
                   position = position_dodge(0.3),
                   size=3,alpha=0.7) +
    geom_linerange(data=df_est_cbn,aes(x=time,ymin=lower_in2,
                                       col=model,ymax=upper_in2,group=model), 
                   position = position_dodge(0.3),
                   size=3,alpha=0.4) +
    geom_point(data=df_est_cbn,aes(x=time,y=mean,shape=model,), 
               position = position_dodge(0.3),size=3.5) +
    facet_grid(reverse ~ symmetric) +
    scale_color_grey() +
    theme_light() +  xlab("") +
    theme(axis.title.y=element_blank(),
          text = element_text(size=20),
          strip.text.y = element_text(angle = 360),
          legend.position = "bottom",
          # panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()
    )
  
  ggsave("fig_beliefsint_cmp.pdf",width=12,height = 6)
  
  # Numbers
  cases <- dat_long %>%
    group_by(symmetric,reverse) %>%
    summarise("N" = n()/4)
  
  # Log Likelihood
  log_lik <- bind_rows(log_lik_m1,log_lik_m2) %>%
    left_join(.,cases) %>%
    mutate(aic = 2*par - 2 *log_lik,
           bic = par*log(N) - 2*log_lik) 


  # 
  table_modelfit <- log_lik %>% 
    mutate(model = case_when(
      model == "separate_each_time" ~ "independent",
      model == "fixpriors_dynamic" ~ "dynamic",
    )) %>%
    pivot_wider(id_cols = c("symmetric","reverse"),
                names_from = model, 
                values_from = c("log_lik", "par","N","aic","bic")) %>%
    dplyr::select(symmetric,reverse, 
           log_lik_independent,log_lik_dynamic,
           aic_independent,aic_dynamic,
           bic_independent,bic_dynamic) 
  
    xtable::xtable(table_modelfit)
  

    
